/* 
    Purpose: Using the 1950 Census, this file takes cleaned variables 
             from 2a and calculates average predicted father income 
             (i.e., "income scores") at various levels.

    Notes: (1) No income scores are made that vary by the 4 Census regions 
               or by education.
           (2) Income scores for non-working fathers are only made at the 
               preferred level of variation (occ x race x south).

    Creates: income scores at occ, occ x race, and occ x race x south levels
             All output files have the prefix "incomescores_fathers1950_"
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
 
    use ./output/Census1950_fathers_ages30to50.dta, clear 
    gen number=1 
    gen tag=1
    
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

* Template 1: occupation
preserve 
    drop if fatheroccej==99 
    collapse (rawsum) tag, by(fatheroccej)
    
    drop tag
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    drop if fatheroccej==99 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    drop tag
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    collapse (rawsum) tag, by(fatheroccej)
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    drop tag
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

global income_measures "inctot occscore"

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys byors_altwgt {

* Cell
    if "`x'"=="byocc" local cell "fatheroccej"
    if "`x'"=="byocc_byr" local cell "fatheroccej race"
    if ("`x'"=="byocc_byr_bys"| "`x'"=="byors_altwgt") local cell "fatheroccej race south_merge"

 * Weight
    if "`x'"!="byors_altwgt" local weight "slwt"
    if "`x'"=="byors_altwgt" local weight "wgt1950_hhsizeadj"

preserve 

    if ("`x'"!="byocc_byr_bys" & "`x'"!="byors_altwgt") drop if fatheroccej==99 
    summ fatheroccej 
    
    collapse (rawsum) number (mean) $income_measures [aw=`weight'], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1950_`x'
    label var avg_`c'_1950_`x' "Coarse income score, average, 1950 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    if "`x'"!="byors_altwgt" use `template_`x''
    if "`x'"=="byors_altwgt" use `template_byocc_byr_bys'  
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1950obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}


*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
*** IMPUTATIONS (INCTOT)
***************************

* Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byocc_byr_bys', clear

    merge 1:1 fatheroccej race south_merge using `incomescores_byors_altwgt'
    assert _merge==3
    drop _merge

    merge m:1 fatheroccej race using `incomescores_byocc_byr'
    assert fatheroccej==99 if _merge==1 
    drop _merge

    merge m:1 fatheroccej using `incomescores_byocc' 
    assert fatheroccej==99 if _merge==1 
    drop _merge
    
    * Drop unnecessary occscore variables
    drop avg_occscore_1950_byocc_byr_bys avg_occscore_1950_byocc_byr avg_occscore_1950_byors_altwgt

    *Flag number of imputations at each level
    foreach level in byocc byocc_byr byocc_byr_bys byors_altwgt {
        gen flag_1950_`level' = (avg_inctot_1950_`level'==.) if fatheroccej!=99 
        replace flag_1950_`level' =0 if fatheroccej==99 
        label var flag_1950_`level' "flag 1950 imputed cells at `level' level"   
    }
    assert flag_1950_byocc_byr_bys==flag_1950_byors_altwgt 

    * Impute at occ x race level
    sort fatheroccej race 

    local threshold "1"
    local number_byr "number_1950obs_byocc_byr"
    local cond "fatheroccej!=99"
    local inc_byr "avg_inctot_1950_byocc_byr"
           
    by fatheroccej: gen ratio = `inc_byr'[_n-1] / `inc_byr'[_n] if  `number_byr'>`threshold' & `number_byr'[_n-1]>`threshold'
    sum ratio [aw=`number_byr'], d //calculate average racial income gap across occupations
    local ratio2 = `r(mean)'
    by fatheroccej: replace `inc_byr' = `inc_byr'[_n-1] / `ratio2' if `inc_byr'==. & `cond'

 
    * Impute inctot at occ x race x south level
            /*Note: No imputations are necessary 
                    for fatheroccej==99. */

    assert number_1950obs_byocc_byr_bys == number_1950obs_byors_altwgt
    drop number_1950obs_byors_altwgt

    sort fatheroccej south_merge race 
    local threshold "1"  
    local number_byr_bys "number_1950obs_byocc_byr_bys"

    foreach it in norm adj {

        if "`it'" == "norm" local inc_byr_bys "avg_inctot_1950_byocc_byr_bys"
        if "`it'" == "adj"  local inc_byr_bys "avg_inctot_1950_byors_altwgt"

        by fatheroccej south_merge: gen white_black_ratio = `inc_byr_bys'[_n-1]/`inc_byr_bys'[_n] if `number_byr_bys'[_n-1]>`threshold' & `number_byr_bys'>`threshold' //only use occs with sufficient obs

        foreach j in 0 1 {
            
            /*Calculate average racial income gap in nonsouth and 
              south separately, across occupation cells */
            quietly sum white_black_ratio if south_merge==`j' [aw=`number_byr_bys']
            local ratio1 = r(mean)
            display `ratio1'
            
            levelsof fatheroccej if race==2 & south_merge==`j' & `inc_byr_bys'==., local(occs1)

            /*Missing income for black respondents: rescale white income by 
              average racial income gap in south or nonsouth, by occupation */
            foreach y in `occs1' {
                display "occupation: `y'"
                sum `inc_byr_bys' if south_merge==`j' & race==1 & fatheroccej==`y'
                replace `inc_byr_bys' = `r(mean)' / `ratio1' if south_merge==`j' & race==2 & fatheroccej==`y'  
            }
        }
        drop *ratio* 
    }

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
   
**************
* SAVE
**************
    foreach x in byocc byocc_byr byocc_byr_bys  {
    
    if "`x'"=="byocc" local cell "fatheroccej"
    if "`x'"=="byocc_byr" local cell "fatheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "fatheroccej race south_merge"

    preserve

    if "`x'"!="byocc_byr_bys" drop if fatheroccej==99 
    
    bysort `cell': keep if _n==1
    
    if "`x'"=="byocc_byr_bys" {
        keep number_1950obs_`x' `cell' avg*`x' *_altwgt flag*`x'
        assert avg_inctot_1950_`x'!=. & avg_inctot_1950_byors_altwgt!=.
    }
    if "`x'"!="byocc_byr_bys" {
        keep number_1950obs_`x' `cell' avg*`x' flag*`x'
        assert avg_inctot_1950_`x'!=.
    }

    summ fatheroccej 
    
    compress
    save ./output/incomescores_fathers1950_`x'.dta, replace
    
    restore
    
    }